-------------------------------------------------------------------------------
--
--  <STRONG>Copyright &copy; 2001, 2002 by Thomas Wolf.</STRONG>
--  <BLOCKQUOTE>
--    This piece of software is free software; you can redistribute it and/or
--    modify it under the terms of the  GNU General Public License as published
--    by the Free Software  Foundation; either version 2, or (at your option)
--    any later version. This software is distributed in the hope that it will
--    be useful, but <EM>without any warranty</EM>; without even the implied
--    warranty of <EM>merchantability or fitness for a particular purpose.</EM>
--    See the GNU General Public License for  more details. You should have
--    received a copy of the GNU General Public License with this distribution,
--    see file "<A HREF="GPL.txt">GPL.txt</A>". If not, write to the Free
--    Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
--    USA.
--  </BLOCKQUOTE>
--  <BLOCKQUOTE>
--    As a special exception from the GPL, if other files instantiate generics
--    from this unit, or you link this unit with other files to produce an
--    executable, this unit does not by itself cause the resulting executable
--    to be covered by the GPL. This exception does not however invalidate any
--    other reasons why the executable file might be covered by the GPL.
--  </BLOCKQUOTE>
--
--  <AUTHOR>
--    Thomas Wolf  (TW) <E_MAIL>
--  </AUTHOR>
--
--  <PURPOSE>
--    Various string utilities not provided in the standard library. Some
--    of these also are repeated here, so that one can get all one needs
--    with a single "@with@".
--  </PURPOSE>
--
--  <NOT_TASK_SAFE>
--
--  <NO_STORAGE>
--
--  <HISTORY>
--    01-MAR-2002   TW  Initial version.
--    14-MAR-2002   TW  Added 'Count'.
--    18-MAR-2002   TW  Added 'Letters'.
--    02-MAY-2002   TW  Added 'Identifier'.
--    02-AUG-2002   TW  Added 'Replace'; corrected bug in 'To_Mixed'.
--    06-AUG-2002   TW  Replaced the body of 'Replace' with a non-recursive
--                      algorithm. The recursive one got slow on large strings
--                      with many replacements.
--  </HISTORY>
-------------------------------------------------------------------------------

with Ada.Exceptions;

package body Strings is

   ----------------------------------------------------------------------------

   package ASF renames Ada.Strings.Fixed;

   ----------------------------------------------------------------------------

   function To_Mixed (S : in String) return String
   is
      Result : String (S'Range);
      Prev   : Character := '_';
   begin
      for I in Result'Range loop
         if Prev = '_' or else Prev = '.' or else
            Ada.Strings.Maps.Is_In (Prev, Blanks)
         then
            Result (I) := To_Upper (S (I));
         else
            Result (I) := To_Lower (S (I));
         end if;
         Prev := S (I);
      end loop;
      return Result;
   end To_Mixed;

   ----------------------------------------------------------------------------

   function First_Index
     (Src : in String;
      Ch  : in Character)
     return Natural
   is
   begin
      for I in Src'Range loop
         if Src (I) = Ch then return I; end if;
      end loop;
      return 0;
   end First_Index;

   function Last_Index
     (Src : in String;
      Ch  : in Character)
     return Natural
   is
   begin
      for I in reverse Src'Range loop
         if Src (I) = Ch then return I; end if;
      end loop;
      return 0;
   end Last_Index;

   function First_Index
     (Source  : in String;
      Pattern : in String)
     return Natural
   is
      Length : constant Natural := Pattern'Length;
   begin
      if Length = 0 then raise Ada.Strings.Pattern_Error; end if;
      for I in Source'First .. Source'Last - Length + 1 loop
         if Source (I .. I + Length - 1) = Pattern then
            return I;
         end if;
      end loop;
      return 0;
   end First_Index;

   function Last_Index
     (Source   : in String;
      Pattern  : in String)
     return Natural
   is
      Length : constant Natural := Pattern'Length;
   begin
      if Length = 0 then raise Ada.Strings.Pattern_Error; end if;
      for I in reverse Source'First .. Source'Last - Length + 1 loop
         if Source (I .. I + Length - 1) = Pattern then
            return I;
         end if;
      end loop;
      return 0;
   end Last_Index;

   function Index
     (Src : in String;
      Ch  : in Character;
      Dir : in Ada.Strings.Direction := Forward)
     return Natural
   is
      use type Ada.Strings.Direction;
   begin
      if Dir = Forward then
         return First_Index (Src, Ch);
      else
         return Last_Index (Src, Ch);
      end if;
   end Index;

   function Index
     (Source  : in String;
      Pattern : in String;
      Dir     : in Ada.Strings.Direction := Forward)
     return Natural
   is
      use type Ada.Strings.Direction;
   begin
      if Dir = Forward then
         return First_Index (Source, Pattern);
      else
         return Last_Index (Source, Pattern);
      end if;
   end Index;

   ----------------------------------------------------------------------------

   function Count
     (Src : in String;
      Ch  : in Character)
     return Natural
   is
      N : Natural := 0;
   begin
      for I in Src'Range loop
         if Src (I) = Ch then N := N + 1; end if;
      end loop;
      return N;
   end Count;

   function Count
     (Source  : in String;
      Pattern : in String)
     return Natural
   is
      Length : constant Natural := Pattern'Length;
   begin
      if Length = 0 then raise Ada.Strings.Pattern_Error; end if;
      if Length > Source'Length then return 0; end if;
      declare
         Stop : constant Natural := Source'Last - Length + 1;
         I    :          Natural := Source'First;
         N    :          Natural := 0;
      begin
         while I <= Stop loop
            if Source (I .. I + Length - 1) = Pattern then
               N := N + 1; I := I + Length;
            else
               I := I + 1;
            end if;
         end loop;
         return N;
      end;
   end Count;

   ----------------------------------------------------------------------------

   function Is_Blank
     (Ch : in Character)
     return Boolean
   is
   begin
      return Ada.Strings.Maps.Is_In (Ch, Blanks);
   end Is_Blank;

   ----------------------------------------------------------------------------

   function Is_In
     (Set : in Ada.Strings.Maps.Character_Set;
      Ch  : in Character)
     return Boolean
   is
   begin
      return Ada.Strings.Maps.Is_In (Ch, Set);
   end Is_In;

   ----------------------------------------------------------------------------

   function Trim
     (S    : in String;
      Side : in Ada.Strings.Trim_End := Both)
     return String
   is
   begin
      case Side is
         when Both =>
            return ASF.Trim (S, Blanks, Blanks);
         when Left =>
            return ASF.Trim (S, Blanks, Null_Set);
         when Right =>
            return ASF.Trim (S, Null_Set, Blanks);
      end case;
   end Trim;

   ----------------------------------------------------------------------------

   procedure Get_String
     (S        : in     String;
      From, To :    out Natural;
      Delim    : in     Character := '"';
      Escape   : in     Character := No_Escape)
   is
   begin
      From := S'First; To := 0;
      while From <= S'Last and then S (From) /= Delim loop
         From := From + 1;
      end loop;
      if From = S'Last then
         return;
      elsif From > S'Last then
         From := 0; return;
      end if;
      To := Skip_String (S (From .. S'Last), Delim, Escape);
   end Get_String;

   function In_String
     (S      : in String;
      Delim  : in Character := '"';
      Escape : in Character := No_Escape)
     return Boolean
   is
      I, From, To : Natural;
   begin
      I := S'First;
      loop
         Get_String (S (I .. S'Last), From, To, Delim, Escape);
         exit when From = 0 or else To = 0;
         I := To + 1;
      end loop;
      return From /= 0 and then To = 0;
   end In_String;

   function Skip_String
     (S      : in String;
      Delim  : in Character := '"';
      Escape : in Character := No_Escape)
     return Natural
   is
      From : Natural;
      To   : Natural := 0;
   begin
      if S'Last < S'First then return 0; end if;
      From := S'First + 1;
      if Escape = No_Escape then
         To := First_Index (S (From .. S'Last), Delim);
      else
         To := From;
         if Escape /= Delim then
            declare
               Escaped : Boolean := False;
            begin
               --  Be careful: a double occurrence of the escape is a literal
               --  escape!
               while To <= S'Last loop
                  if S (To) = Escape then
                     Escaped := not Escaped;
                  elsif S (To) = Delim and then not Escaped then
                     return To;
                  else
                     Escaped := False;
                  end if;
                  To := To + 1;
               end loop;
            end;
         else
            --  Delimiters are escaped by doubling!
            while To < S'Last loop
               if S (To) = Delim then
                  if S (To + 1) /= Delim then
                     return To;
                  end if;
                  To := To + 2;
               else
                  To := To + 1;
               end if;
            end loop;
            if To <= S'Last and then S (To) = Delim then return To; end if;
         end if;
         To := 0;
      end if;
      return To;
   end Skip_String;

   function Quote
     (S      : in String;
      Delim  : in Character;
      Escape : in Character)
     return String
   is
   begin
      if Escape = No_Escape then
         return S;
      else
         declare
            Result : String (1 .. S'Length * 2);
            J      : Natural := 1;
         begin
            if Escape = Delim then
               for I in S'Range loop
                  if S (I) = Delim then
                     Result (J) := Escape; J := J + 1;
                  end if;
                  Result (J) := S (I); J := J + 1;
               end loop;
            else
               for I in S'Range loop
                  if S (I) = Delim or else S (I) = Escape then
                     Result (J) := Escape; J := J + 1;
                  end if;
                  Result (J) := S (I); J := J + 1;
               end loop;
            end if;
            return Result (1 .. J - 1);
         end;
      end if;
   end Quote;

   function Unquote
     (S      : in String;
      Delim  : in Character;
      Escape : in Character)
     return String
   is
      Result : String (1 .. S'Length);
      I      : Natural := S'First;
      J      : Natural := Result'First;
   begin
      if Escape = Delim then
         while I <= S'Last loop
            if S (I) = Delim and then
               I < S'Last and then S (I + 1) = Delim
            then
               I := I + 1;
            end if;
            Result (J) := S (I);
            J := J + 1; I := I + 1;
         end loop;
      else
         while I <= S'Last loop
            if S (I) = Escape and then
               I < S'Last and then
               (S (I + 1) = Escape or else S (I + 1) = Delim)
            then
               I := I + 1;
            end if;
            Result (J) := S (I);
            J := J + 1; I := I + 1;
         end loop;
      end if;
      return Result (Result'First .. J - 1);
   end Unquote;

   function Unquote_All
     (S      : in String;
      Quotes : in Ada.Strings.Maps.Character_Set;
      Escape : in Character := No_Escape)
     return String
   is
      I, J : Natural;
      Ch   : Character;
   begin
      I := ASF.Index (S, Quotes);
      if I = 0 then return S; end if; --  No string found
      if Escape = No_Escape then
         Ch := S (I);
      else
         Ch := Escape;
      end if;
      J := Skip_String (S (I .. S'Last), S (I), Ch);
      if J = 0 then
         --  Unterminated string!
         return S;
      end if;
      return S (S'First .. I) &
             Unquote (S (I + 1 .. J - 1), S (I), Ch) & S (I) &
             Unquote_All (S (J + 1 .. S'Last), Quotes, Escape);
   end Unquote_All;

   ----------------------------------------------------------------------------

   function Is_Prefix
     (Source : in String;
      Prefix : in String)
     return Boolean
   is
   begin
      return
        Source (Source'First ..
                Natural'Min (Source'Last,
                             Source'First + Prefix'Length - 1)) =
        Prefix;
   end Is_Prefix;

   function Is_Suffix
     (Source : in String;
      Suffix : in String)
     return Boolean
   is
   begin
      if Suffix'Length > Source'Length then return False; end if;
      return
        Source (Source'Last - Suffix'Length + 1 .. Source'Last) = Suffix;
   end Is_Suffix;

   ----------------------------------------------------------------------------

   ID_Chars : constant Ada.Strings.Maps.Character_Set :=
     Ada.Strings.Maps."or" (Letters, Ada.Strings.Maps.To_Set ("0123456789_"));

   function Identifier
     (S : in String)
     return Natural
   is
   begin
      if S'Last < S'First or else not Is_In (Letters, S (S'First)) then
         return 0;
      end if;
      declare
         I : Natural := S'First + 1;
      begin
         while I <= S'Last and then Is_In (ID_Chars, S (I)) loop
            I := I + 1;
         end loop;
         return I - 1;
      end;
   end Identifier;

   ----------------------------------------------------------------------------

   function Next_Non_Blank
     (S : in String)
     return Natural
   is
   begin
      for I in S'Range loop
         if not Is_Blank (S (I)) then return I; end if;
      end loop;
      return 0;
   end Next_Non_Blank;

   ----------------------------------------------------------------------------

   function Next_Blank
     (S : in String)
     return Natural
   is
   begin
      for I in S'Range loop
         if Is_Blank (S (I)) then return I; end if;
      end loop;
      return 0;
   end Next_Blank;

   ----------------------------------------------------------------------------

   function Replace
     (Source : in String;
      What   : in String;
      By     : in String)
     return String
   is
      --  Speed optimized for both small and large strings, with few and many
      --  replacements.
      N : constant Natural := Count (Source, What);
   begin
      if N = 0 then return Source; end if;
      declare
         By_Length   : constant Natural := By'Length;
         What_Length : constant Natural := What'Length;
         Result      : String (1 ..
                               Source'Length - N * (What_Length - By_Length));
         --  Now if only the compiler was smart enough to allocate 'Result'
         --  directly on the secondary stack instead of using 'alloca'...
         --  Note that 'Result' has exactly the length it needs!
         I, J, K     : Natural;
      begin
         J := 1;
         I := Source'First;
         while I <= Source'Last loop
            K := First_Index (Source (I .. Source'Last), What);
            if K = 0 then
               Result (J .. Result'Last) := Source (I .. Source'Last);
               --  J := Result'Last + 1;
               I := Source'Last + 1;
            else
               Result (J .. J + (K - I) - 1) := Source (I .. K - 1);
               J := J + (K - I);
               Result (J .. J + By_Length - 1) := By;
               J := J + By_Length;
               I := K + What_Length;
            end if;
         end loop;
         return Result;
      end;
   end Replace;

   ----------------------------------------------------------------------------
   --  Wildcard string matching

   package ASM renames Ada.Strings.Maps;

   procedure Raise_Unterminated_Set
     (Pattern : in String)
   is
   begin
      Ada.Exceptions.Raise_Exception
        (Illegal_Pattern'Identity,
         "Unterminated character set: '" & Pattern & "'");
   end Raise_Unterminated_Set;

   procedure Raise_Backslash_At_End
   is
   begin
      Ada.Exceptions.Raise_Exception
        (Illegal_Pattern'Identity,
         "'\' at end of pattern (must be followed by a character)");
   end Raise_Backslash_At_End;

   --  generic...
   function Wildcard_Match
     (Pattern      : in String;
      Text         : in String)
     return Boolean
   is

      procedure Parse_Set
        (Pattern      : in     String;
         Set          :    out ASM.Character_Set;
         Stop         :    out Natural;
         Inverted     :    out Boolean)
      is
         use type ASM.Character_Set;

         Lower : Character;
         Start : Natural;
      begin
         Set := Null_Set;
         if Pattern'Last <= Pattern'First + 1 then
            --  Even if it is "[]", it is unterminated!
            Raise_Unterminated_Set (Pattern);
         end if;
         Start := Pattern'First + 1;
         Inverted :=
           Set_Inverter /= No_Set_Inverter and then
           Pattern (Start) = Set_Inverter;
         if Inverted then Start := Start + 1; end if;
         --  Go looking for the end of the set.
         if Start <= Pattern'Last and then Pattern (Start) = ']' then
            --  This ']' is to be part of the set!
            Stop := Start + 1;
         else
            Stop := Start;
         end if;
         Stop := First_Index (Pattern (Stop .. Pattern'Last), ']');
         if Stop <= Start then
            Raise_Unterminated_Set (Pattern);
         end if;
         --  The set is defined by 'Pattern (Start .. Stop-1)'
         while Start < Stop loop
            Lower := Pattern (Start);
            Start := Start + 1;
            if Pattern (Start) = '-' and then (Start + 1 < Stop) then
               --  We have a range.
               Start := Start + 1;
               if Pattern (Start) < Lower then
                  Ada.Exceptions.Raise_Exception
                    (Illegal_Pattern'Identity,
                     "Upper bound of range in character set is smaller " &
                     "than lower bound: '" &
                     Lower & '-' & Pattern (Start) & "'");
               end if;
               Set :=
                 Set or
                 ASM.To_Set (ASM.Character_Range'(Lower, Pattern (Start)));
               Start := Start + 1;
            else
               Set :=
                 Set or ASM.To_Set (ASM.Character_Range'(Lower, Lower));
            end if;
         end loop;
      end Parse_Set;

      Match_Impossible : exception;

      function Internal_Match
        (Pattern      : in String;
         Text         : in String)
        return Boolean
      is
         Pattern_I : Natural := Pattern'First;
         Text_I    : Natural := Text'First;
         Switch    : Character;
      begin
         while Text_I <= Text'Last loop
            if (Pattern_I > Pattern'Last) then
               --  The text is not yet exhausted, hence it can't match!
               return False;
            end if;
            Switch := Pattern (Pattern_I);
            if Has_Escape and then Switch = '\' then
               --  Literal match with next pattern character
               if Pattern_I = Pattern'Last then
                  Raise_Backslash_At_End;
               end if;
               Pattern_I := Pattern_I + 1;
               if Pattern (Pattern_I) /= Text (Text_I) then
                  return False;
               end if;
               Pattern_I := Pattern_I + 1;
               Text_I    := Text_I + 1;
            elsif Has_Char_Set and then Switch = '[' then
               --  Character set
               declare
                  Set      : ASM.Character_Set;
                  Inverted : Boolean;
               begin
                  Parse_Set (Pattern (Pattern_I .. Pattern'Last),
                             Set, Pattern_I, Inverted);
                  if Inverted = ASM.Is_In (Text (Text_I), Set) then
                     return False;
                  end if;
               end;
               Pattern_I := Pattern_I + 1;
               Text_I    := Text_I + 1;
            elsif Switch = Any_One then
               if Zero_Or_One then
                  --  Null matches are OK!
                  declare
                     Result : Boolean;
                  begin
                     --  First try the null match, i.e. advance the
                     --  pattern, but not the text.
                     Result :=
                       Internal_Match
                         (Pattern (Pattern_I + 1 .. Pattern'Last),
                          Text (Text_I .. Text'Last));
                     if Result then return Result; end if;
                     --  No match: try the any-one match: advance both.
                  end; --  block
               end if;
               --  Match any character.
               Pattern_I := Pattern_I + 1;
               Text_I    := Text_I + 1;
            elsif Switch = Zero_Or_More then
               --  First, collate sequences of '*'s (and maybe '?'s).
               while Pattern_I < Pattern'Last and then
                     (Pattern (Pattern_I + 1) = Zero_Or_More or
                      (Zero_Or_One and then
                       Pattern (Pattern_I + 1) = Any_One))
               loop
                  Pattern_I := Pattern_I + 1;
               end loop;
               --  '*' at the end of the pattern matches anything:
               if Pattern_I >= Pattern'Last then return True; end if;
               --  Find next possible match, if any.
               declare
                  Next : Natural;
                  P_I  : Natural := Pattern_I + 1;
               begin
                  if Pattern (P_I) = Any_One then
                     --  Can match any one character: don't skip.
                     Next := Text_I;
                  elsif Has_Char_Set and then Pattern (P_I) = '[' then
                     --  Skip to the next character matching the set
                     declare
                        Set      : ASM.Character_Set;
                        Dummy    : Natural;
                        Inverted : Boolean;
                     begin
                        Parse_Set (Pattern (P_I .. Pattern'Last),
                                   Set, Dummy, Inverted);
                        if Inverted then
                           Next := ASF.Index (Text (Text_I .. Text'Last),
                                              Set, Ada.Strings.Outside);
                        else
                           Next := ASF.Index (Text (Text_I .. Text'Last),
                                              Set, Ada.Strings.Inside);
                        end if;
                     end; --  block
                  else
                     --  Skip ahead to the next matching character
                     declare
                        Ch : Character := Pattern (P_I);
                     begin
                        if Has_Escape and then Ch = '\' then
                           if P_I = Pattern'Last then
                              Raise_Backslash_At_End;
                           end if;
                           Ch := Pattern (P_I + 1);
                        end if;
                        Next :=
                          First_Index (Text (Text_I .. Text'Last), Ch);
                     end;
                  end if;
                  if Next = 0 then raise Match_Impossible; end if;
                  --  No match was possible, so abort the whole thing.
                  --  (This can be done safely because any other match
                  --  for any previous '*' could only require an even
                  --  later match of the character following the current
                  --  '*'-sequence - but we know that this is impossible.
                  --  Therefore, any other matching for previous '*'s is
                  --  bound to fail and hence we may give up. Without
                  --  this, the algorithm would have quadratic behaviour
                  --  in some failure cases, e.g. the "-adobe" negative
                  --  case in the test program.)
                  declare
                     Result : Boolean;
                  begin
                     Result :=
                       Internal_Match (Pattern (P_I .. Pattern'Last),
                                       Text (Next .. Text'Last));
                     --  This recursion is limited by the number of '*'
                     --  sequences in the pattern.
                     if Result then return Result; end if;
                  end;
                  --  This star couldn't match this minimal text
                  --  sequence; try to extend it. Note: 'Pattern_I' has
                  --  not been incremented here.
                  Text_I := Next + 1;
               end; --  block
            else
               --  Literal match between the pattern and the text
               if Pattern (Pattern_I) /= Text (Text_I) then
                  return False;
               end if;
               Pattern_I := Pattern_I + 1;
               Text_I    := Text_I + 1;
            end if;
         end loop;
         --  Skip remaining '*'s (and maybe '?'s) at the end of the pattern.
         while Pattern_I <= Pattern'Last and then
               (Pattern (Pattern_I) = Zero_Or_More or
                (Zero_Or_One and then Pattern (Pattern_I) = Any_One))
         loop
            Pattern_I := Pattern_I + 1;
         end loop;
         return Pattern_I > Pattern'Last;
      end Internal_Match;

   begin --  Match_G
      return Internal_Match (Pattern, Text);
   exception
      when Match_Impossible =>
         return False;
   end Wildcard_Match;

   function Instance is new Wildcard_Match;

   function Match
     (Pattern : in String;
      Text    : in String)
     return Boolean
     renames Instance;

end Strings;
